/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    NominalToBinary.java
 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
 *
 */


package en_deep.mlprocess.manipulation;

import java.util.ArrayList;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.DenseInstance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

import java.util.Enumeration;
import java.util.HashMap;
import java.util.Vector;

/**
 <!-- globalinfo-start -->
 * Converts all nominal attributes into binary numeric attributes. An attribute with k values is transformed
 * into k binary attributes if the class is nominal (using the one-attribute-per-value approach).
 * Binary attributes are left binary, if option '-A' is not given.
 * If the class is numeric, you might want to use the supervised version of this filter.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 *
 * <pre> -N
 *  Sets if binary attributes are to be coded as nominal ones.</pre>
 *
 * <pre> -A
 *  For each nominal value a new attribute is created,
 *  not only if there are more than 2 values.</pre>
 *
 * <pre> -R &lt;col1,col2-col4,...&gt;
 *  Specifies list of columns to act on. First and last are
 *  valid indexes.
 *  (default: first-last)</pre>
 *
 * <pre> -V
 *  Invert matching sense of column indexes.</pre>
 *
 * <pre> -S
 *  Multiple-value separator string (default: one space).</pre>
 *
 * <pre> -D
 *  Dual mode (non-set-aware and set-aware output for each attribute, 
 * i.e. some values will appear twice).</pre>
 *
 <!-- options-end -->
 *
 */
public class SetAwareNominalToBinary
  extends Filter
  implements UnsupervisedFilter, OptionHandler {

  /** for serialization */
  static final long serialVersionUID = -1130642825744549138L;

  /** Default separator of set values */
  private static final String defaultSeparator = " ";

  /** Stores which columns to act on */
  protected Range m_Columns = new Range();

  /** Are the new attributes going to be nominal or numeric ones? */
  private boolean m_Numeric = true;

  /** Are all values transformed into new attributes? */
  private boolean m_TransformAll = false;

  /** Dual mode (set-aware and normal output) */
  private boolean m_DualMode = false;
  
  /** Apply set mode only on the attributes with the given prefix */
  private String m_SetOnlyPrefix = null;

  /** The separator string for set values */
  private String m_Separator = defaultSeparator;

  /** Values of output attributes for each converted input attribute, plus their output indexes */
  private HashMap<String,Integer> [] m_producedAttVals;

  /** Constructor - initialises the filter */
  public SetAwareNominalToBinary() {

    setAttributeIndices("first-last");
  }

  /**
   * Returns a string describing this filter
   *
   * @return a description of the filter suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {

    return "Converts all nominal attributes into binary numeric attributes while "
            + "respecting attributes that may contain sets of values.";
  }

  /**
   * Returns the Capabilities of this filter.
   *
   * @return            the capabilities of this object
   * @see               Capabilities
   */
  public Capabilities getCapabilities() {
    Capabilities result = super.getCapabilities();
    result.disableAll();

    // attributes
    result.enableAllAttributes();
    result.enable(Capability.MISSING_VALUES);

    // class
    result.enableAllClasses();
    result.enable(Capability.MISSING_CLASS_VALUES);
    result.enable(Capability.NO_CLASS);

    return result;
  }

  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input
   * instance structure (any instances contained in the object are
   * ignored - only the structure is required).
   * @return true if the outputFormat may be collected immediately
   * @throws Exception if the input format can't be set
   * successfully
   */
  public boolean setInputFormat(Instances instanceInfo)
       throws Exception {

    super.setInputFormat(instanceInfo);

    m_Columns.setUpper(instanceInfo.numAttributes() - 1);

    setOutputFormat();
    return true;
  }

  /**
   * Input an instance for filtering. Filter requires all
   * training instances be read before producing output.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be
   * collected with output().
   * @throws IllegalStateException if no input format has been set
   */
  public boolean input(Instance instance) {

    if (getInputFormat() == null) {
      throw new IllegalStateException("No input instance format defined");
    }
    if (m_NewBatch) {
      resetQueue();
      m_NewBatch = false;
    }

    convertInstance(instance);
    return true;
  }

  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {

    Vector newVector = new Vector(3);

    newVector.addElement(new Option(
	"\tSets if binary attributes are to be coded as nominal ones.",
	"N", 0, "-N"));

    newVector.addElement(new Option(
	"\tFor each nominal value a new attribute is created, \n"
	+ "\tnot only if there are more than 2 values.",
	"A", 0, "-A"));

    newVector.addElement(new Option(
	"\tSpecifies list of columns to act on. First and last are \n"
	+ "\tvalid indexes.\n"
	+ "\t(default: first-last)",
	"R", 1, "-R <col1,col2-col4,...>"));

    newVector.addElement(new Option(
	"\tInvert matching sense of column indexes.",
	"V", 0, "-V"));

    newVector.addElement(new Option(
        "\tThe separator for set values.",
        "S", 1, "-V <sep_char>"));

    newVector.addElement(new Option(
	"\tDual mode (non-set-aware and set-aware).",
	"D", 0, "-D"));
    
    newVector.addElement(new Option(
        "\tIf non-empty, only the attributes whose name starts with the"
        + "given string will be affected by the set-aware mode.",
        "P", 1, "-P <prefix>"));

    return newVector.elements();
  }


  /**
   * Parses a given list of options. <p/>
   *
   <!-- options-start -->
   * Valid options are: <p/>
   *
   * <pre> -N
   *  Sets if binary attributes are to be coded as nominal ones.</pre>
   *
   * <pre> -A
   *  For each nominal value a new attribute is created,
   *  not only if there are more than 2 values.</pre>
   *
   * <pre> -R &lt;col1,col2-col4,...&gt;
   *  Specifies list of columns to act on. First and last are
   *  valid indexes.
   *  (default: first-last)</pre>
   *
   * <pre> -V
   *  Invert matching sense of column indexes.</pre>
   *
   * <pre> -S
   *  Multiple-value separator string (default: one space).</pre>
   *
   * <pre> -D
   *  Dual mode (non-set-aware and set-aware output).</pre>
   *
    <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {

    setBinaryAttributesNominal(Utils.getFlag('N', options));

    setTransformAllValues(Utils.getFlag('A', options));

    String convertList = Utils.getOption('R', options);
    if (convertList.length() != 0) {
      setAttributeIndices(convertList);
    } else {
      setAttributeIndices("first-last");
    }
    setInvertSelection(Utils.getFlag('V', options));

    setDualMode(Utils.getFlag('D', options));

    setSeparator(Utils.getOption('S', options));
    
    setSetOnlyPrefix(Utils.getOption('P', options));

    if (getInputFormat() != null)
      setInputFormat(getInputFormat());
  }

  /**
   * Gets the current settings of the filter.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  @Override
  public String [] getOptions() {

    String [] options = new String [8];
    int current = 0;

    if (getBinaryAttributesNominal()) {
      options[current++] = "-N";
    }

    if (getTransformAllValues()) {
      options[current++] = "-A";
    }

    if (!getAttributeIndices().equals("")) {
      options[current++] = "-R"; options[current++] = getAttributeIndices();
    }
    if (getInvertSelection()) {
      options[current++] = "-V";
    }
    if (!getSeparator().equals(defaultSeparator)){
      options[current++] = "-S"; options[current++] = getSeparator();
    }
    if (getSetOnlyPrefix() != null){
      options[current++] = "-P"; options[current++] = getSetOnlyPrefix();
    }
    if (getDualMode()){
        options[current++] = "-D";
    }

    while (current < options.length) {
      options[current++] = "";
    }
    return options;
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String binaryAttributesNominalTipText() {
    return "Whether resulting binary attributes will be nominal.";
  }

  /**
   * Gets if binary attributes are to be treated as nominal ones.
   *
   * @return true if binary attributes are to be treated as nominal ones
   */
  public boolean getBinaryAttributesNominal() {

    return !m_Numeric;
  }

  /**
   * Sets if binary attributes are to be treated as nominal ones.
   *
   * @param bool true if binary attributes are to be treated as nominal ones
   */
  public void setBinaryAttributesNominal(boolean bool) {

    m_Numeric = !bool;
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String transformAllValuesTipText() {
    return "Whether all nominal values are turned into new attributes, not only if there are more than 2.";
  }

  /**
   * Gets if all nominal values are turned into new attributes, not only if
   * there are more than 2.
   *
   * @return true all nominal values are transformed into new attributes
   */
  public boolean getTransformAllValues() {

    return m_TransformAll;
  }

  /**
   * Sets whether all nominal values are transformed into new attributes, not
   * just if there are more than 2.
   *
   * @param bool true if all nominal value are transformed into new attributes
   */
  public void setTransformAllValues(boolean bool) {

    m_TransformAll = bool;
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String invertSelectionTipText() {

    return "Set attribute selection mode. If false, only selected"
      + " (numeric) attributes in the range will be discretized; if"
      + " true, only non-selected attributes will be discretized.";
  }

  /**
   * Gets whether the supplied columns are to be removed or kept
   *
   * @return true if the supplied columns will be kept
   */
  public boolean getInvertSelection() {

    return m_Columns.getInvert();
  }

  /**
   * Sets whether selected columns should be removed or kept. If true the
   * selected columns are kept and unselected columns are deleted. If false
   * selected columns are deleted and unselected columns are kept.
   *
   * @param invert the new invert setting
   */
  public void setInvertSelection(boolean invert) {

    m_Columns.setInvert(invert);
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String attributeIndicesTipText() {
    return "Specify range of attributes to act on."
      + " This is a comma separated list of attribute indices, with"
      + " \"first\" and \"last\" valid values. Specify an inclusive"
      + " range with \"-\". E.g: \"first-3,5,6-10,last\".";
  }

  /**
   * Gets the current range selection
   *
   * @return a string containing a comma separated list of ranges
   */
  public String getAttributeIndices() {

    return m_Columns.getRanges();
  }

  /**
   * Sets which attributes are to be acted on.
   *
   * @param rangeList a string representing the list of attributes. Since
   * the string will typically come from a user, attributes are indexed from
   * 1. <br>
   * eg: first-3,5,6-last
   * @throws IllegalArgumentException if an invalid range list is supplied
   */
  public void setAttributeIndices(String rangeList) {

    m_Columns.setRanges(rangeList);
  }

  /**
   * Set the output format if the class is nominal.
   */
  private void setOutputFormat() {

    FastVector newAtts;
    int newClassIndex;
    Instances outputFormat;

    // Compute new attributes

    m_producedAttVals = new HashMap[getInputFormat().numAttributes()];
    newClassIndex = getInputFormat().classIndex();
    newAtts = new FastVector();

    for (int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (!att.isNominal() || (j == getInputFormat().classIndex()) ||
	  !m_Columns.isInRange(j)) {
	newAtts.addElement(att.copy());
      } else {
	if ( (att.numValues() <= 2) && (!m_TransformAll) ) {
	  if (m_Numeric) {
	    newAtts.addElement(new Attribute(att.name()));
	  } else {
	    newAtts.addElement(att.copy());
	  }
	} else {

          ArrayList<Attribute> valueAttrs = convertAttribute(att);

          if (newClassIndex >= 0 && j < getInputFormat().classIndex()) {
	    newClassIndex += valueAttrs.size() - 1;
	  }
          newAtts.addAll(valueAttrs);
	}
      }
    }
    outputFormat = new Instances(getInputFormat().relationName(),
				 newAtts, 0);
    outputFormat.setClassIndex(newClassIndex);
    setOutputFormat(outputFormat);
  }

  /**
   * Convert a single instance over if the class is nominal. The converted
   * instance is added to the end of the output queue.
   *
   * @param instance the instance to convert
   */
  private void convertInstance(Instance instance) {

    double [] vals = new double [outputFormatPeek().numAttributes()];
    int attSoFar = 0;

    for(int j = 0; j < getInputFormat().numAttributes(); j++) {
      Attribute att = getInputFormat().attribute(j);
      if (!att.isNominal() || (j == getInputFormat().classIndex()) ||
	  !m_Columns.isInRange(j)) {
	vals[attSoFar] = instance.value(j);
	attSoFar++;
      } else {
	if ( (att.numValues() <= 2) && (!m_TransformAll) ) {
	  vals[attSoFar] = instance.value(j);
	  attSoFar++;
	} else {
	  attSoFar += setConvertedAttribute(att, instance.value(j), vals, attSoFar);
	}
      }
    }
    Instance inst = null;
    if (instance instanceof SparseInstance) {
      inst = new SparseInstance(instance.weight(), vals);
    } else {
      inst = new DenseInstance(instance.weight(), vals);
    }
    inst.setDataset(getOutputFormat());
    copyValues(inst, false, instance.dataset(), getOutputFormat());
    inst.setDataset(getOutputFormat());
    push(inst);
  }

  /**
   * Returns the revision string.
   *
   * @return		the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 5987 $");
  }

  /**
   * Main method for testing this class.
   *
   * @param argv should contain arguments to the filter:
   * use -h for help
   */
  public static void main(String [] argv) {
    runFilter(new SetAwareNominalToBinary(), argv);
  }

    /**
     * Returns the separator string for set values.
     * @return the currently set separator string for set values
     */
    public String getSeparator() {
        return m_Separator;
    }


    /**
     * Sets a new separator string for set values.
     */
    public void setSeparator(String separator) {
        if (separator == null){
            separator = "";
        }
        m_Separator = separator;
    }

    /**
     * Find all possible values for the given attribute while respecting its possible
     * set structure, return them as a list of new binary (or nominal t/f) attributes.
     *
     * @param att the attribute to be converted
     * @return a list of output attributes for this attribute
     */
    private ArrayList<Attribute> convertAttribute(Attribute att){

        ArrayList newAtts = new ArrayList<Attribute>();

        // In dual mode, use each possible value
        // Apply this mode also to non-set attributes if set attributes are marked with a prefix
        if (m_DualMode || m_SetOnlyPrefix != null && !att.name().startsWith(m_SetOnlyPrefix)){
            for (int k = 0; k < att.numValues(); ++k){

                String attName = att.name() + "=" + att.value(k);

                newAtts.add(m_Numeric ? new Attribute(attName) : makeNominalAttribute(attName));
            }
        }

        // Find all possible set values (for a set-mode attribute)
        if (m_SetOnlyPrefix == null || att.name().startsWith(m_SetOnlyPrefix)){
            m_producedAttVals[att.index()] = new HashMap<String, Integer>();

            for (int k = 0; k < att.numValues(); k++) {

                String [] setVals = att.value(k).split(m_Separator);

                for (String setVal : setVals){

                    if (m_producedAttVals[att.index()].containsKey(setVal)){
                        continue;
                    }
                    m_producedAttVals[att.index()].put(setVal, m_producedAttVals[att.index()].size());

                    String attName = att.name() + ">" + setVal;

                    newAtts.add(m_Numeric ? new Attribute(attName) : makeNominalAttribute(attName));
                }
            }
        }

        return newAtts;
    }

    /**
     * Sets the values for all binary attributes pertaining to the given source attribute with
     * respect to possible multiple values (and normal setting, if {@link #m_DualMode} is enabled).
     * 
     * @param att the source attribute
     * @param value the source value
     * @param vals the field where the values are to be stored
     * @param offset the offset where the values for this attribute should begin
     * @return number of processed output columns
     */
    private int setConvertedAttribute(Attribute att, double value, double[] vals, int offset) {

        String strVal = att.value((int) value);
        String [] setVals = strVal.split(m_Separator);
        int totalValues = 0;
        
        if (m_DualMode || m_SetOnlyPrefix != null && !att.name().startsWith(m_SetOnlyPrefix)){
            totalValues += att.numValues();
        }
        if (m_SetOnlyPrefix == null || att.name().startsWith(m_SetOnlyPrefix)){
            totalValues += m_producedAttVals[att.index()].size();
        }

        if (Utils.isMissingValue(value)){
            for (int i = 0; i < totalValues; ++i){
                vals[offset + i] = value;
            }
            return totalValues;
        }

        if (m_DualMode || m_SetOnlyPrefix != null && !att.name().startsWith(m_SetOnlyPrefix)){
            vals[offset + (int) value] = 1;
            offset += att.numValues();
        }
        if (m_SetOnlyPrefix == null || att.name().startsWith(m_SetOnlyPrefix)){
            for (String setVal : setVals) {
                vals[offset + m_producedAttVals[att.index()].get(setVal)] = 1;
            }
        }
        return totalValues;
    }

    /**
     * Returns true if the dual mode setting is in effect.
     * @return true if the dual mode is enabled
     */
    private boolean getDualMode() {
        return this.m_DualMode;
    }

    /**
     * Sets dual mode (set-aware + normal) on/off.
     *
     * @param dualMode new value of dualMode mode
     */
    private void setDualMode(boolean dualMode) {
        this.m_DualMode = dualMode;
    }

    /**
     * Create a nominal binary attribute with the given name and two values <tt>f</tt> and <tt>t</tt>.
     * @param attributeName the desired attribute name
     * @return the new nominal binary attribute
     */
    private Attribute makeNominalAttribute(String attributeName) {

        ArrayList binVals = new ArrayList(2);

        binVals.add("f");
        binVals.add("t");
        return new Attribute(attributeName, binVals);
    }

    /**
     * Sets the set-only prefix of attributes.
     * @param setOnlyPrefix the new value
     */
    public void setSetOnlyPrefix(String setOnlyPrefix) {
        if ("".equals(setOnlyPrefix)){
            setOnlyPrefix = null;
        }
        this.m_SetOnlyPrefix = setOnlyPrefix;      
    }

    /**
     * Returns the current the set-only prefix for attributes
     * @return  the current value of the set-only attribute prefix
     */
    public String getSetOnlyPrefix() {
        return this.m_SetOnlyPrefix;
    }

}
